log using "/home/dcohen/work/202410_rents_replication/logs/01-01.log", replace t
********************************************************************************
*** SETUP
********************************************************************************

* Specify version
version 15
clear all 
set maxvar 7500

* Set global macro for input and output paths
global longpath "/home/dcohen/soep-data/soep.v35/stata_de+en"
global savepath "/home/dcohen/work/202410_rents_replication/dat/proc-data"

********************************************************************************
*** Merge Data Sets
********************************************************************************

* Load individual-level data from PL
use "$longpath/pl.dta", clear
sort pid syear

* Merge with data on survey history from PPFADL
merge 1:1 pid syear using "$longpath/ppathl.dta"
/*
all obs in master matched
*/
drop if _merge == 2
drop _merge


* Merge with generated individual data from PGEN
merge 1:1 pid syear using "$longpath/pgen.dta"
/*
45 obs in master not matched (from 1984); 
keep those (imputable, but likely irrelevant)
*/
drop if _merge == 2
drop _merge


* Merge with time-invariant individual data from PBRUTTO
merge 1:1 pid syear using "$longpath/pbrutto.dta", keepusing(geburt_v2)
/*
all obs in master matched
*/
drop if _merge == 2
drop _merge

* Merge with data on first and last job from BIOJOB
merge m:1 pid using "$longpath/biojob.dta", ///
      keepusing(isco88 fjselfe fjsefsiz einstieg_artk einstieg_pbio nacefj ///
	            nacelj occljob ljblue ljselfe ljsefsiz ljwhite ljcivs ///
				yearlast einstieg_artk einstieg_pbio)
/*
5,061 obs in master not matched. Possibly those that haven't completed the 
biograhpy interview yet. Auxiliary information, hence not critical.
*/
drop if _merge == 2
drop _merge

* Merge with rolling household data from HL
merge m:1 hid syear using "$longpath/hl.dta", ///
      keepusing(hlc0001 hlc0002 hlc0005_h hlc0043 hlf0135 hlf0153_h hlk0007 ///
	  hlk0044 hlc0008_h hlf0606 hlf0632 hlc0017 hlf0107_h hlf0108_* hlf0126 ///
	  hlf0127 hlf0128 hlf0129 hlf0130 hlf0131 hlf0132 hlf0526 hlf0151 ///
	  hlf0152 hlf0153_h hlf0523)
/*
703 obs in master not matched (between 2003-2018); keep those (imputable)
*/
drop if _merge == 2
drop _merge

* Merge with generated household data from HGEN
merge m:1 hid syear using "$longpath/hgen.dta", ///
      keepusing(hghinc hgtyp1hh hgtyp2hh hgacquis hgmoveyr hgnorent hgowner ///
	            hgreduc hgrent hgrentinfo hgreval hgroom hgrsubs hgseval ///
				hgsize hgutil hgcnstyrmax hgcnstyrmin hgcondit hghinc hgcondit)
/*
703 obs in master not matched (between 2003-2018); keep those (imputable)
*/
drop if _merge == 2
drop _merge

* Merge with asset information from HWEALTH
merge m:1 hid syear using "$longpath/hwealth.dta", ///
      keepusing(p100h0 p010h* p001h* p011h* e100h0 e010h* e001h* w010h* ///
	            w001h* w011h* n010h* n001h* n011h*)
/*
many obs in master not matched, because variable only available in
2002, 2007, 2012, 2017; substantial missings in 2012 (8.90%) and 2017 (17.44%)
*/
drop if _merge == 2
drop _merge

* Merge with all variables from PEQUIV
merge 1:1 pid syear using "$longpath/pequiv.dta"
drop if _merge == 2
drop _merge
/*
703 obs in master not matched (between 2003-2018); keep those (imputable)
*/

* Merge with all variables from REGIONL
merge m:1 hid syear using "$longpath/regionl.dta"
drop if _merge == 2
drop _merge
/*
all obs in master matched
new vars: regbez-kr_gdp_pc
*/

* Merge with all variables from MOVEDIST
merge 1:1 pid syear using "$longpath/movedist.dta"
drop if _merge == 2
drop _merge
/*
225,802 obs in master not matched; all <= year 2000, thus won't be used anyway
new vars: res_move - chg_zip
*/

* Merge with residency information from HBRUTTO
merge m:1 hid syear using "$longpath/hbrutto.dta", ///
      keepusing(wein_v3)
/*
all obs in master matched
*/
drop if _merge == 2
drop _merge

********************************************************************************
*** Recodes
********************************************************************************

*** ID Variables
* Create general, comparable variables
gen long id = pid
gen long hh_id = hid
gen year = syear

* Wave and country identifier
egen wave = group(year)

*** Weights
gen lweight = pbleib 
gen xweight1 = phrf 
gen xweight2 = phrf0 /* Hochrechnungsfaktor fuer Ursprungsstichproben (Welle 1)*/
gen xweight3 = phrf1 /* Hochrechnungsfaktor ohne Ursprungsstichproben (Welle 1)*/

* First interview
gen first = year == erstbefr

*** Demographics
* Gender
gen fem = (sex ==2 ) if sex >= 0 & !missing(sex)

* Birthyear/Age
gen born = geburt_v2 if geburt_v2 >= 0 & !missing(geburt_v2)
gen age = d11101 if d11101 >= 0 & !missing(d11101)
replace age = year - born if missing(age)

* Age categories
la def age5 1 "< 25" 2 "25-34" 3 "35-44" 4 "45-54" 5 ">=55"
gen age5 = .
replace age5 = 1 if age < 25
replace age5 = 2 if inrange(age, 25, 34)
replace age5 = 3 if inrange(age, 35, 44)
replace age5 = 4 if inrange(age, 45, 54)
replace age5 = 5 if age >= 55 & !missing(age)
la val age5 age5

* Migration Background
gen migr3 = inlist(migback, 1, 2) if migback >= 0  & !missing(migback)

* Bundesland (numerical codes correspond to GLES)
clonevar bl = bula

* East/West Indicator
gen east = (sampreg == 2)

* Citizenship
gen citizen = (pgnation == 1) if pgnation >= 0 

* Role in household
clonevar hh_role = d11105

*** Household Composition
* Household members
gen hh_mmb = d11106 if d11106 >= 0  & !missing(d11106)

* Children in HH
gen hh_num_child = d11107 if d11107 >= 0  & !missing(d11107)
gen hh_prop_child = hh_num_child / hh_mmb

* Household Typology
la def hh_comp 1 "Single" 2 "Partnered" 3 "Single Parent" 4 "Partnered Parent"
gen hh_comp = hgtyp1hh
recode hh_comp (4 5 6 7 8 = 4)
la val hh_comp hh_comp


*** Socio-economic Indicators
* Education
la def edu5 1 "<= Lower Secondary" 2 "Upper Secondary" ///
            3 "Post-Secondary Non-Tertiary" 4 "Higher Vocational" ///
			5 "Higher Education"
gen edu5 = pgisced97 - 1 if pgisced97 >= 1  & !missing(pgisced97)
recode edu5 (0 = 1)
la val edu5 edu5

* Monthly Personal Labor Income
gen pinc_labor = i11110 / 12 if i11110 >= 0 & !missing(i11110)
gen log_pinc_labor = log(pinc_labor + 1)
/*
Labor earnings include wages and salary from all employment including
training, primary and secondary jobs, and self-employment, plus income
from bonuses, overtime, and profit-sharing.
Specifically labor earnings is the sum of income from primary job, secondary
job, self-employment, 13th month pay, 14th month pay, Christmas bonus
pay, holiday bonus pay, miscellaneous bonus pay, and profit-sharing income.
*/

* Monthly Personal Total Income
global pinc_components = "ijob1 ijob2 iself ioldy iwidy icomp iprvp iunby iunay isuby ieret imaty istuy imilt ialim iachm ichsu ispou ielse iwith i13ly i14ly ixmas iholy igray iothy itray idemy igrv1 ismp1 iciv1 iwar1 iagr1 iguv1 ivbl1 icom1 iprv1 irie1 iaus1 ilib1 ison1 igrv2 ismp2 iciv2 iwar2 iagr2 iguv2 ivbl2 icom2 iprv2 irie2 iaus2 ilib2 ison2"

foreach X in $pinc_components {
  mvdecode `X', mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)
}

egen    pinc_gross_ttl = rowtotal($pinc_components) , missing
replace pinc_gross_ttl = pinc_gross_ttl / 12

* Monthly gross household income
egen    hinc_gross_ttl = total(pinc_gross_ttl), by(hh_id wave) missing

* Monthly personal income as a proportion of monthly household income
gen prop_personal_hinc = pinc_gross_ttl / hinc_gross

* Monthly Household Net Income
gen hinc = hlc0005_h if hlc0005_h >= 0 & !missing(hlc0005_h)

* Proportion household income from rents
mvdecode renty opery lossr i11102 i11109, mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)
tempvar net_rental_income 
gen `net_rental_income' = (renty - opery - lossr)
gen prop_rentinc_hinc = `net_rental_income' / i11102

* Equivalized HH Income
gen hinc_eq = hinc / sqrt(hh_mmb)
gen log_hinc_eq = log(hinc_eq + 1)

*** Labor Market Participation & Status
* Indicator for current labor market participation
la def lm 1 "Active" 2 "Unemployed" 3 "In Education" ///
		  4 " (Mostly) inactive" 5 "Pensioners"
recode pglfs (11 12 4 = 1) (6 = 2) (3 5  = 3) (1 8 9 10 = 4) (2 = 5), ///
             copy gen(lm_part)
replace lm_part = . if lm_part <= 0
la val lm_part lm

* Number/proportion economically active HH membership
tempvar active
recode lm_part (2 3 4 5= 0), gen(`active')
egen hh_num_ecact  = total(`active'), by(hh_id wave)
gen  hh_prop_ecact = hh_num_ecact / hh_mmb

* Current unemployment
gen unemp00 = (lm_part == 2) if !missing(lm_part)


*** Atypical employment
* Temporary Employment
gen     temp = (plb0037_v1 == 1)
replace temp = . if plb0037_v1 < 0 /* missing / invalid */
replace temp = 1 if plb0037_v2 == 1 & syear > 1999 & syear < 2006
replace temp = 0 if plb0037_v2 == 2 & syear > 1999 & syear < 2006
replace temp = 1 if plb0037_v3 == 2 & syear > 2005
replace temp = 0 if plb0037_v3 == 1 & syear > 2005
replace temp = 0 if missing(temp) & plb0057_h > 0 & plb0057_h < 6

/*
sort id year
bys id: replace temp = temp[_n-1] if missing(temp)
*/

* Involuntary Parttime
gen     deshour = .
replace deshour = plb0241_v1
replace deshour = plb0241_v2 if syear > 1999 & syear != 2016
replace deshour = deshour/10 if syear > 1999 & syear != 2016
replace deshour = . if deshour<0

/*
sort id year
bys id: replace deshour = deshour[_n-1] if missing(deshour)
*/

gen     realhour = .
replace realhour = plb0176_v1
replace realhour = plb0176_v3 if syear > 1989
replace realhour = realhour/10 if syear > 1989
replace realhour=. if realhour < 0

/*
sort id year
bys id: replace realhour = realhour[_n-1] if missing(realhour)
*/

gen     diffhour = deshour - realhour
gen     involpart = diffhour >= 5 & deshour > 35
replace involpart = . if missing(diffhour)
drop    deshour realhour diffhour

* Zeit-/Leiharbeit (from 2001)
gen     zlwork = plb0041 == 1
replace zlwork = . if !inrange(plb0041, 1, 2)

/*
sort id year
bys id: replace zlwork = zlwork[_n-1] if missing(zlwork)
*/

* Atypical employment (any form)
gen     aemp = temp == 1 | involpart == 1 | zlwork == 1 
replace aemp = . if missing(temp) & missing(involpart) & missing(zlwork) 

* Update labormarket status accordingly:	
replace lm_part = 6 if aemp == 1	
drop aemp

la def lm 1 "Active" 2 "Unemployed" 3 "In Education" ///
		  4 " (Mostly) inactive" 5 "Pensioners"	///
		  6 "Active, Atypical", replace
la val lm_part lm
	
					
********************************************************************************************************************************************
* OESCH CLASS SCHEMA
* Create 16-Class schema, 8-Class schema, 5-Class schema, 4-class schema
* Data: German Socio-Economic Panel (SOEP), version: CNEF equivalent file
* July 2015
* Oliver Lipps, Swiss Centre of Expertise in the Social Sciences (FORS), Lausanne
*********************************************************************************************************************************************

**** References:
**** Oesch, D. (2006a) "Coming to grips with a changing class structure" International Sociology 21 (2): 263-288.
**** Oesch, D. (2006b) "Redrawing the Class Map. Stratification and Institutions in Britain, Germany, Sweden and Switzerland", Basingstoke: Palgrave Macmillan.
**** A few minor changes were made with respect to the procedure described in these two sources (decisions taken by Oesch and Tawfik in 2013)

**** 16-Class schema constructed
  *1 Large employers
  *2 Self-employed professionals
  *3 Small business owners with employees
  *4 Small business owners without employees
  *5 Technical experts
  *6 Technicians
  *7 Skilled craft and production workers
  *8 Low-skilled production workers
  *9 Higher-grade managers and administrators
  *10 Lower-grade managers and administrators
  *11 Skilled clerks
  *12 Unskilled clerks
  *13 Socio-cultural professionals
  *14 Socio-cultural semi-professionals
  *15 Skilled service
  *16 Low-skilled service

**** 8-Class schema constructed
  *1 Self-employed professionals and large employers
  *2 Small business owners
  *3 Technical (semi-)professionals
  *4 Production workers
  *5 (Associate) managers
  *6 Clerks
  *7 Socio-cultural (semi-)professionals
  *8 Service workers

**** 5-Class schema constructed
  *1 Higher-grade service class
  *2 Lower-grade service class
  *3 Small business owners
  *4 Skilled workers
  *5 Low-skilled workers

**** 4-Class schema constructed
  *1 Upper-middle class
  *2 Lower-middle class
  *3 Skilled workers
  *4 Low-skilled workers
  
**** Variables used to construct Oesch class schema: iscoco, emplrel, emplno, iscocop, emprelp, emplnop

****************************************************************************************
* Respondent's Oesch class position
* Recode and create variables used to construct class variable for respondents
* Variables used to construct class variable for respondents: iscoco, emplrel, emplno
****************************************************************************************
/*
*** define macro LONG_FILES which is a variable for the directory which include the SOEP person long file pl,
*** and the SOEP person equivalent file pequiv. For example if these files are in d:\my_SOEP_data
*** then write the command line: �global LONG_FILES d:\my_SOEP_data\�

use pid syear e11105 using $longpath\pequiv, clear // key variable is e11105 (isco-88 at 4-digit level)
rename e11105 isco_mainjob // Current occupation of respondent, coded with isco88 4-digit
merge 1:1 pid syear using $longpath\pl, keepusing(plb0058 plb0057-plb0065) ///
 keep(match) nogen // only people who answered the survey
*/

**** Recode occupation variable (isco88 com 4-digit) for respondents
clonevar isco_mainjob = pgisco88
tab isco_mainjob
recode isco_mainjob (66666 77777 88888 99999=-9), copyrest
label variable isco_mainjob "Current occupation of respondent - isco88 4-digit"
tab isco_mainjob

*** Recode employment status for respondents (I)

gen emplrel_r=1 if plb0058>0 | plb0063_v1>0 | plb0064_v2>0 | plb0065>0 //Workers, apprentices, employees, public servants
replace emplrel_r=2 if plb0057_h>0 & plb0057_h<6 // Self-employed
replace emplrel_r=3 if plb0057_h==6 // Working family member

gen emplno_r=1 if plb0059==2 | plb0060==2 | plb0061==2 // 1-9 employees
replace emplno_r=2 if plb0059==3 | plb0060==3 | plb0061==3 // 10+ employees
replace emplno_r=0 if emplno==. & emplrel_r!=.

recode emplrel_r emplno_r (.=9)
drop plb*

**** Recode employment status for respondents (II)

tab emplrel
label define emplrel_r ///
1 "Employee" ///
2 "Self-employed" ///
3 "Working for own family business" ///
9 "Missing"
label value emplrel_r emplrel_r
tab emplrel_r

tab emplno
label define emplno_r ///
0 "0 employees" ///
1 "1-9 employees" ///
2 "10+ employees"
label value emplno_r emplno_r
tab emplno_r

gen selfem_mainjob=.
replace selfem_mainjob=1 if emplrel_r==1 | emplrel_r==9
replace selfem_mainjob=2 if emplrel_r==2 & emplno_r==0
replace selfem_mainjob=2 if emplrel_r==3
replace selfem_mainjob=3 if emplrel_r==2 & emplno_r==1
replace selfem_mainjob=4 if emplrel_r==2 & emplno_r==2
label variable selfem_mainjob "Employment status for respondants"
label define selfem_mainjob ///
1 "Not self-employed" ///
2 "Self-empl without employees" ///
3 "Self-empl with 1-9 employees" ///
4 "Self-empl with 10 or more"
label value selfem_mainjob selfem_mainjob
tab selfem_mainjob

*** Recode self-employment for years 2013- 
*** Note: plb0057_h-plb0065 not recorded, but information available in pgstib
replace selfem_mainjob=2 if inlist(pgstib, 411, 421, 431) & ///
                            inrange(year, 2013, 2018)
replace selfem_mainjob=3 if inlist(pgstib, 412, 422, 432) & ///
                            inrange(year, 2013, 2018)
replace selfem_mainjob=4 if inlist(pgstib, 413, 423, 433) & ///
                            inrange(year, 2013, 2018)

*******************************************************************************
** Create Oesch class schema for respondents
*******************************************************************************

gen class16_r = -9

* Large employers (1)

replace class16_r=1 if selfem_mainjob==4


* Self-employed professionals (2)

replace class16_r=2 if (selfem_mainjob==2 | selfem_mainjob==3) & (isco_mainjob >= 2000 & isco_mainjob <= 2229) 
replace class16_r=2 if (selfem_mainjob==2 | selfem_mainjob==3) & (isco_mainjob >= 2300 & isco_mainjob <= 2470)

* Small business owners with employees (3)

replace class16_r=3 if (selfem_mainjob==3) & (isco_mainjob >= 1000 & isco_mainjob <= 1999)
replace class16_r=3 if (selfem_mainjob==3) & (isco_mainjob >= 3000 & isco_mainjob <= 9333)
replace class16_r=3 if (selfem_mainjob==3) & (isco_mainjob == 2230)

* Small business owners without employees (4)

replace class16_r=4 if (selfem_mainjob==2) & (isco_mainjob >= 1000 & isco_mainjob <= 1999)
replace class16_r=4 if (selfem_mainjob==2) & (isco_mainjob >= 3000 & isco_mainjob <= 9333)
replace class16_r=4 if (selfem_mainjob==2) & (isco_mainjob == 2230)

* Technical experts (5)

replace class16_r=5 if (selfem_mainjob==1) & (isco_mainjob >= 2100 & isco_mainjob <= 2213)

* Technicians (6)

replace class16_r=6 if (selfem_mainjob==1) & (isco_mainjob >= 3100 & isco_mainjob <= 3152)
replace class16_r=6 if (selfem_mainjob==1) & (isco_mainjob >= 3210 & isco_mainjob <= 3213)
replace class16_r=6 if (selfem_mainjob==1) & (isco_mainjob == 3434)

* Skilled craft and production workers (7)

replace class16_r=7 if (selfem_mainjob==1) & (isco_mainjob >= 6000 & isco_mainjob <= 7442)
replace class16_r=7 if (selfem_mainjob==1) & (isco_mainjob >= 8310 & isco_mainjob <= 8312)
replace class16_r=7 if (selfem_mainjob==1) & (isco_mainjob >= 8324 & isco_mainjob <= 8330)
replace class16_r=7 if (selfem_mainjob==1) & (isco_mainjob >= 8332 & isco_mainjob <= 8340)

* Low-skilled production workers (8)

replace class16_r=8 if (selfem_mainjob==1) & (isco_mainjob >= 8000 & isco_mainjob <= 8300)
replace class16_r=8 if (selfem_mainjob==1) & (isco_mainjob >= 8320 & isco_mainjob <= 8321)
replace class16_r=8 if (selfem_mainjob==1) & (isco_mainjob == 8331)
replace class16_r=8 if (selfem_mainjob==1) & (isco_mainjob >= 9153 & isco_mainjob <= 9333)

* Higher-grade managers and administrators (9)

replace class16_r=9 if (selfem_mainjob==1) & (isco_mainjob >= 1000 & isco_mainjob <= 1239)
replace class16_r=9 if (selfem_mainjob==1) & (isco_mainjob >= 2400 & isco_mainjob <= 2429)
replace class16_r=9 if (selfem_mainjob==1) & (isco_mainjob == 2441)
replace class16_r=9 if (selfem_mainjob==1) & (isco_mainjob == 2470)

* Lower-grade managers and administrators (10)

replace class16_r=10 if (selfem_mainjob==1) & (isco_mainjob >= 1300 & isco_mainjob <= 1319)
replace class16_r=10 if (selfem_mainjob==1) & (isco_mainjob >= 3400 & isco_mainjob <= 3433)
replace class16_r=10 if (selfem_mainjob==1) & (isco_mainjob >= 3440 & isco_mainjob <= 3450)

* Skilled clerks (11)

replace class16_r=11 if (selfem_mainjob==1) & (isco_mainjob >= 4000 & isco_mainjob <= 4112)
replace class16_r=11 if (selfem_mainjob==1) & (isco_mainjob >= 4114 & isco_mainjob <= 4210)
replace class16_r=11 if (selfem_mainjob==1) & (isco_mainjob >= 4212 & isco_mainjob <= 4222)

* Low-skilled clerks (12)

replace class16_r=12 if (selfem_mainjob==1) & (isco_mainjob == 4113)
replace class16_r=12 if (selfem_mainjob==1) & (isco_mainjob == 4211)
replace class16_r=12 if (selfem_mainjob==1) & (isco_mainjob == 4223)

* Socio-cultural professionals (13)

replace class16_r=13 if (selfem_mainjob==1) & (isco_mainjob >= 2220 &  isco_mainjob <= 2229)
replace class16_r=13 if (selfem_mainjob==1) & (isco_mainjob >= 2300 &  isco_mainjob <= 2320)
replace class16_r=13 if (selfem_mainjob==1) & (isco_mainjob >= 2340 &  isco_mainjob <= 2359)
replace class16_r=13 if (selfem_mainjob==1) & (isco_mainjob >= 2430 &  isco_mainjob <= 2440)
replace class16_r=13 if (selfem_mainjob==1) & (isco_mainjob >= 2442 &  isco_mainjob <= 2443)
replace class16_r=13 if (selfem_mainjob==1) & (isco_mainjob == 2445)
replace class16_r=13 if (selfem_mainjob==1) & (isco_mainjob == 2451)
replace class16_r=13 if (selfem_mainjob==1) & (isco_mainjob == 2460)

* Socio-cultural semi-professionals (14)

replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob == 2230)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob >= 2330 & isco_mainjob <= 2332)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob == 2444)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob >= 2446 & isco_mainjob <= 2450)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob >= 2452 & isco_mainjob <= 2455)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob == 3200)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob >= 3220 & isco_mainjob <= 3224)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob == 3226)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob >= 3229 & isco_mainjob <= 3340)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob >= 3460 & isco_mainjob <= 3472)
replace class16_r=14 if (selfem_mainjob==1) & (isco_mainjob == 3480)

* Skilled service (15)

replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob == 3225)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob >= 3227 & isco_mainjob <= 3228)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob >= 3473 & isco_mainjob <= 3475)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob >= 5000 & isco_mainjob <= 5113)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob == 5122)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob >= 5131 & isco_mainjob <= 5132)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob >= 5140 & isco_mainjob <= 5141)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob == 5143)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob >= 5160 & isco_mainjob <= 5220)
replace class16_r=15 if (selfem_mainjob==1) & (isco_mainjob == 8323)

* Low-skilled service (16)

replace class16_r=16 if (selfem_mainjob==1) & (isco_mainjob >= 5120 & isco_mainjob <= 5121)
replace class16_r=16 if (selfem_mainjob==1) & (isco_mainjob >= 5123 & isco_mainjob <= 5130)
replace class16_r=16 if (selfem_mainjob==1) & (isco_mainjob >= 5133 & isco_mainjob <= 5139)
replace class16_r=16 if (selfem_mainjob==1) & (isco_mainjob == 5142)
replace class16_r=16 if (selfem_mainjob==1) & (isco_mainjob == 5149)
replace class16_r=16 if (selfem_mainjob==1) & (isco_mainjob == 5230)
replace class16_r=16 if (selfem_mainjob==1) & (isco_mainjob == 8322)
replace class16_r=16 if (selfem_mainjob==1) & (isco_mainjob >= 9100 &  isco_mainjob <= 9152)

mvdecode class16_r, mv(-9)
label variable class16_r "Respondent's Oesch class position - 16 classes"
label define class16_r ///
1 "Large employers" ///
2 "Self-employed professionals" ///
3 "Small business owners with employees" ///
4 "Small business owners without employees" ///
5 "Technical experts" ///
6 "Technicians" ///
7 "Skilled craft and production workers" ///
8 "Low-skilled craft and production workers" ///
9 "Higher-grade managers and administrators" ///
10 "Lower-grade managers and administrators" ///
11 "Skilled clerks" ///
12 "Low-skilled clerks" ///
13 "Socio-cultural professionals" ///
14 "Socio-cultural semi-professionals" ///
15 "Skilled service" ///
16 "Low-skilled service"
label value class16_r class16_r
tab class16_r

recode class16_r (1 2=1)(3 4=2)(5 6=3)(7 8=4)(9 10=5)(11 12=6)(13 14=7)(15 16=8), gen(class8_r)
label variable class8_r "Respondent's Oesch class position - 8 classes"
label define class8_r ///
1 "Self-employed professionals and large employers" ///
2 "Small business owners" ///
3 "Technical (semi-)professionals" ///
4 "Production workers" ///
5 "(Associate) managers" ///
6 "Clerks" ///
7 "Socio-cultural (semi-)professionals" ///
8 "Service workers"
label value class8_r class8_r
tab class8_r

recode class16_r (1 2 5 9 13=1)(6 10 14=2)(3 4=3)(7 11 15=4)(8 12 16=5), gen(class5_r)
label variable class5_r "Respondent's Oesch class position - 5 classes"
label define  class5_r ///
1 "Higher-grade service class" ///
2 "Lower-grade service class" ///
3 "Small business owners" ///
4 "Skilled workers" ///
5 "Low-skilled workers"
label value class5_r class5_r
tab class5_r


global class3noemp class5_r==3 & emplno_r==0 // Oesch class 3 and 0 employee
gen class4=class5_r        // recoding to 4 classes with the following change: class5_r==3
replace class4=2 if class5_r==3 & emplno_r==1 // 1-9 employees
replace class4=1 if $class3noemp & ((isco_>1000 & isco_<1300) | (isco_>2000 & isco_<3000))
replace class4=2 if $class3noemp & ((isco_>=1300 & isco_<2000) | isco_==2331 | isco_==2446 ///
| (isco_>=3000 & isco_<4000))
replace class4=4 if $class3noemp & (isco_==3228 | isco_==3475 | isco_>=4111)
replace class4=5 if $class3noemp & (isco_==4113 | isco_==4211 | isco_==4223 | isco_==5121 ///
| isco_==5123 | isco_==5133 | isco_==5139 | isco_==5149 | (isco_>8000 & isco_< 8300) | isco_==8322 ///
| isco_==8331 | (isco_>=9131 & isco_<.))
recode class4 (4=3) (5=4)            // 
label define class4_r ///
1 "Upper-middle class" ///
2 "Lower-middle class" ///
3 "Skilled workers" ///
4 "Low-skilled workers"
label value class4 class4_r
tab class4

* Adapted 6-Class Scheme
la def class6 1 "Employers & Self-Employed Professionals" ///
              2 "Petite Bourgeoisie" ///
			  3 "Skilled Technical & Organizational" ///
			  4 "Socio-cultural (Semi-)Professionals" ///
			  5 "Blue Collar Workers" ///
			  6 "Lower Office & Service Functionaries"

gen class6_r = class8
recode class6_r (1=1) (2=2) (3 5=3) (4=5) (6 8=6) (7=4)
la val class6_r class6

* Cascade replace class where missing
sort id year
foreach X in class16_r class8_r class5_r class4 class6_r {
  bys id: replace `X' = `X'[_n-1] if missing(`X') 
}

* Adapted 4-Class Scheme
la def myclass4 1 "Petite Bourgeoisie" ///
			    2 "Skilled Professionals" ///
			    3 "Blue Collar Workers" ///
			    4 "Lower Office & Service Functionaries"
gen myclass4_r = class6_r
recode myclass4_r (2 = 1) (1 3 4 = 2) (5 = 3) (6 = 4) 		
replace myclass4_r = 1 if occljob == 2 & ///
                          inlist(ljsefsiz, 10, 11, 12, 30) & ///
						  missing(myclass4_r)
replace myclass4_r = 2 if occljob == 2 & ///
                          inlist(ljsefsiz, 20, 40) & ///
						  missing(myclass4_r)
replace myclass4_r = 2 if (occljob == 3 & ///
                          inlist(ljwhite, 30, 40, 50) | /// 
						  occljob == 4 & ///
                          inlist(ljcivs, 30, 40)) & ///
						  missing(myclass4_r)
replace myclass4_r = 3 if occljob == 1  & ///
						  missing(myclass4_r)
replace myclass4_r = 4 if (occljob == 3 & ///
                          inlist(ljwhite, 10, 20, 21, 22) | /// 
						  occljob == 4 & ///
                          inlist(ljcivs, 10, 20)) & ///
						  missing(myclass4_r)	  
la val myclass4_r myclass4


*** Assign class of household members
clonevar myclass4 = myclass4_r

* Mode within multi-person households
egen hh_class = mode(myclass4_r) if hh_mmb > 1, by(hh_id year)

* In two-person households, use other person's if available
replace myclass4 = hh_class if hh_mmb == 2 & missing(myclass4)

* In multi-person household, use that of nearest neighbor by age if available
sort hh_id year age
by hh_id year: gen nn_class = cond(age - age[_n-1] > age[_n+1] - age, ///
               myclass4_r[_n+1], myclass4_r[_n-1]) 
replace myclass4 = nn_class if hh_mmb > 2 & missing(myclass4)
drop nn_class

* Where still missing, assign household mode if available
replace myclass4 = hh_class if missing(myclass4)



*******************************************************************************
** Task groups (Kurer)
*******************************************************************************

clonevar isco = pgisco88
replace isco = . if pgisco88 < 0

* 6 Task group scheme
gen task = .
replace task = 1 if inlist(isco, 2411, 2431, 2441, 3411, 3471)
replace task = 1 if inrange(isco, 2100, 2213)
replace task = 1 if inrange(isco, 2443, 2444)
replace task = 1 if inrange(isco, 2446, 2452)
replace task = 1 if inrange(isco, 3100, 3212)
replace task = 1 if inrange(isco, 3433, 3440)
replace task = 1 if inrange(isco, 3442, 3444)

replace task = 2 if inlist(isco, 2442, 2445, 3226, 3432, 3441)
replace task = 2 if inrange(isco, 1000, 1319)
replace task = 2 if inrange(isco, 2220, 2410)
replace task = 2 if inrange(isco, 2412, 2430)
replace task = 2 if inrange(isco, 2432, 2440)
replace task = 2 if inrange(isco, 2453, 2470)
replace task = 2 if inrange(isco, 3213, 3220)
replace task = 2 if inrange(isco, 3222, 3224)
replace task = 2 if inrange(isco, 3229, 3410)
replace task = 2 if inrange(isco, 3412, 3429)
replace task = 2 if inrange(isco, 3449, 3470)
replace task = 2 if inrange(isco, 3472, 3480)

replace task = 3 if isco==4223
replace task = 3 if inrange(isco, 3430, 3431)
replace task = 3 if inrange(isco, 4000, 4195)
replace task = 3 if inrange(isco, 4210, 4215)

replace task = 4 if inlist(isco, 7124, 8340, 9120, 9133)
replace task = 4 if inrange(isco, 1, 110) 
/* departing from oesch, including 110 (armed forces). 
   this is the actual intention of 1-100 */
replace task = 4 if inrange(isco, 6100, 7113)
replace task = 4 if inrange(isco, 7200, 8290)
replace task = 4 if inrange(isco, 9000, 9001)
replace task = 4 if inrange(isco, 9150, 9151)
replace task = 4 if inrange(isco, 9153, 9161)
replace task = 4 if inrange(isco, 9200, 9311)

replace task = 5 if inlist(isco, 5122, 5143, 9002, 9162)
replace task = 5 if inrange(isco, 7120, 7123)
replace task = 5 if inrange(isco, 7129, 7143)
replace task = 5 if inrange(isco, 8300, 8334)
replace task = 5 if inrange(isco, 9130, 9132)
replace task = 5 if inrange(isco, 9140, 9142)
replace task = 5 if inrange(isco, 9312, 9313)

replace task = 6 if inlist(isco, 3221, 3225, 4200, 9152)
replace task = 6 if inrange(isco, 3227, 3228)
replace task = 6 if inrange(isco, 4220, 4222)
replace task = 6 if inrange(isco, 5000, 5121)
replace task = 6 if inrange(isco, 5123, 5142)
replace task = 6 if inrange(isco, 5149, 5220)
replace task = 6 if inrange(isco, 9003, 9005)
replace task = 6 if inrange(isco, 9100, 9113)
replace task = 6 if inrange(isco, 9320, 9333) 
/* departing from oesch, including 9333 (transport labourers, animal vehicles)*/

* add isco categories 2000 and 3000
* officially not defined, thus not part of oesch's categories
replace task = 1 if inlist(isco, 2000, 3000)

* 99xxer category not classified..
* whats that?? some kind of "in training/education" subgroup?
* azubis, praktikanten, vorarbeiter, gruppenleiter, sonstige arbeitskraefte o.n.t.
* o.n.t. = ohne naehere taetigkeitsangabe
* 9910, 9920, 9930, 9942 cannot be classified (could be any industry/task)
* 9950 = facharbeiter. most likely some routine labour.
replace task = 4 if isco == 9950

* 9960 = heimarbeiter. cannot be classified, could be telemarketing (service) or producing paper flowers (routine)
* 9970 / 9980 = vorarbeiter, gruppenleiter, sonstige arbeitskraefte. too broad to classify. could be any task.
/*ISCO88	Freq.	Percent	Cum.
			
[9332] Fuehrer v. v.Tieren gezogenen Fa	1	0.07	0.07
[9910] Mith. Fam.Ang. ausserhalb der La	59	4.10	4.17
[9920] Auszubildende mit (noch) nicht f	28	1.95	6.12
[9930] Praktikanten, Volontaere ohne fe	72	5.00	11.12
[9942] Sonstige Arbeitskraefte, arbeits	1	0.07	11.19
[9950] Facharbeiter o.n.T.	46	3.20	14.38
[9960] Heimarbeiter o.n.T.	76	5.28	19.67
[9970] Vorarbeiter, Gruppenleiter	164	11.40	31.06
[9980] Sonstige Arbeitskraefte o.n.T.	992	68.94	100.00
			
Total	1,439	100.00
*/

* 3 Task Group scheme
gen task3 = .
replace task3 = 1 if task == 1 | task == 2
replace task3 = 2 if task == 3 | task == 4
replace task3 = 3 if task == 5 | task == 6
la def task3 1 "Non-routine cognitive" 2 "Routine" 3 "Non-routine manual"
la def task6 1 "Non-routine cognitive A" 2 "Non-routine cognitive B" ///
             3 "Routine A" 4 "Routine B" ///
			 5 "Non-routine manual A" 6 "Non-routine manual B", replace
la val task3 task3
la val task task6

* Cascade replace task where missing
sort id year
foreach X in task task3 {
  bys id: replace `X' = `X'[_n-1] if missing(`X') 
}

drop isco_mainjob isco


*******************************************************************************
** NACE 1.1 Sectors
*******************************************************************************

recode pgnace (-1 = .) (-2 = .a) (1/5 = 1) (11/14 = 14) (15/36 = 3) ///
              (37/41 = 4) (45 = 5) (50/52 = 6) (55 = 7) (60/64 = 8) ///
			  (65/67 = 9) (70/74 = 10) (75 = 11) (80 = 12) (85 = 13) ///
			  (86/99 = 14), gen(nace)
la def nace 1  "A+B: Agriculture" ///
			3  "D: Manufacturing" ///
			4  "E: Energy" ///
			5  "F: Construction" ///
			6  "G: Trade" ///
			7  "H: Hotels/Restaurants" ///
			8  "I: Transport/Communication" ///
			9  "J: Financial Intermediation" ///
			10 "K: Real Estate/Business" ///
			11 "L: Public Administration" ///
			12 "M: Education" ///
			13 "N: Health/Social Work" ///
			14 "C+O+P+Q: Other (incl. Mining)" 
la val nace nace

* Cascade replace nace where missing
sort id year
foreach X in nace {
  bys id: replace `X' = `X'[_n-1] if missing(`X') 
}


*******************************************************************************
** Behavior and attitudes
*******************************************************************************

* Party ID
gen     partyid = .

* 1994-2018
replace partyid = 0  if inrange(year, 1994, 2018) & plh0011_h == 2  // none
replace partyid = 1  if inrange(year, 1994, 2018) & plh0012_h == 1  // SPD
replace partyid = 2  if inrange(year, 1994, 2018) & inlist(plh0012_h, 2, 3, 13) // CDU/CSU
replace partyid = 3  if inrange(year, 1994, 2018) & plh0012_h == 4  // FPD
replace partyid = 4  if inrange(year, 1994, 2018) & plh0012_h == 5  // Grüne
replace partyid = 5  if inrange(year, 1994, 2018) & plh0012_h == 6  // Linke
replace partyid = 6  if inrange(year, 1994, 2018) & plh0012_h == 7  // Rechte (NPD/Rep/DieRechte)
replace partyid = 8  if inrange(year, 1994, 2018) & plh0012_h == 27 // Rechte (AfD)
replace partyid = 7  if inrange(year, 1994, 2018) & inrange(plh0012_h,  8, 12) | ///
                        inrange(plh0012_h, 13, 26) | ///
  inrange(plh0012_h, 28, 31)    // Others
replace partyid = .b if inrange(year, 1994, 2018) & plh0012_h == -8

* 1984-1989
replace partyid = 0  if inrange(year, 1984, 1989) & plh0011_h  == 2 // none
replace partyid = 1  if inrange(year, 1984, 1989) & plh0012_v1 == 1 // SPD
replace partyid = 2  if inrange(year, 1984, 1989) & inlist(plh0012_v1, 2, 4) // CDU/CSU
replace partyid = 3  if inrange(year, 1984, 1989) & plh0012_v1 == 5 // FPD
replace partyid = 4  if inrange(year, 1984, 1989) & plh0012_v1 == 6 // Grüne
replace partyid = 7  if inrange(year, 1984, 1989) & plh0012_v1 == 7 // Others
replace partyid = .b if inrange(year, 1984, 1989) & plh0012_v1 == -8

* 1990 (West)
replace partyid = 0  if year == 1990 & plh0011_h  == 2 // none
replace partyid = 1  if year == 1990 & plh0012_v2 == 1 // SPD
replace partyid = 2  if year == 1990 & inlist(plh0012_v2, 2, 4) // CDU/CSU
replace partyid = 3  if year == 1990 & plh0012_v2 == 5 // FPD
replace partyid = 4  if year == 1990 & plh0012_v2 == 6 // Grüne
replace partyid = 6  if year == 1990 & plh0012_v2 == 7 // Rep
replace partyid = 7  if year == 1990 & plh0012_v2 == 8 // Others
replace partyid = .b if year == 1990 & plh0012_v2 == -8

* 1991 (West)
replace partyid = 0  if year == 1991 & plh0011_h  == 2 // none
replace partyid = 1  if year == 1991 & plh0012_v3 == 1 // SPD
replace partyid = 2  if year == 1991 & plh0012_v3 == 2 // CDU/CSU
replace partyid = 3  if year == 1991 & plh0012_v3 == 3 // FPD
replace partyid = 4  if year == 1991 & plh0012_v3 == 4 // Grüne
replace partyid = 4  if year == 1991 & plh0012_v3 == 5 // B90
replace partyid = .  if year == 1991 & plh0012_v3 == 6 // PDS
replace partyid = 6  if year == 1991 & plh0012_v3 == 7 // Rep
replace partyid = 7  if year == 1991 & plh0012_v3 == 8 // Others
replace partyid = .b if year == 1991 & plh0012_v3 == -8

* 1992
replace partyid = 0  if year == 1992 & plh0011_h  == 2 // none
replace partyid = 1  if year == 1992 & plh0012_v4 == 1 // SPD
replace partyid = 2  if year == 1992 & inlist(plh0012_v4, 2, 3) // CDU/CSU
replace partyid = 3  if year == 1992 & plh0012_v4 == 4 // FPD
replace partyid = 4  if year == 1992 & plh0012_v4 == 5 // Grüne
replace partyid = 4  if year == 1992 & plh0012_v4 == 6 // B90
replace partyid = 5  if year == 1992 & plh0012_v4 == 7 // PDS
replace partyid = 7  if year == 1992 & plh0012_v4 == 8 // Others
replace partyid = .b if year == 1992 & plh0012_v4 == -8

* 1993
replace partyid = 0  if year == 1993 & plh0011_h  == 2 // none
replace partyid = 1  if year == 1993 & plh0012_v5 == 1 // SPD
replace partyid = 2  if year == 1993 & inlist(plh0012_v5, 2, 3) // CDU/CSU
replace partyid = 3  if year == 1993 & plh0012_v5 == 4 // FPD
replace partyid = 4  if year == 1993 & plh0012_v5 == 5 // Grüne
replace partyid = 4  if year == 1993 & plh0012_v5 == 6 // B90
replace partyid = 5  if year == 1993 & plh0012_v5 == 7 // PDS
replace partyid = 6  if year == 1993 & plh0012_v5 == 8 // Rep
replace partyid = 7  if year == 1993 & plh0012_v5 == 9 // Others
replace partyid = .b if year == 1993 & plh0012_v5 == -8
					   
* Vote 2013					   
gen     vote = .
replace vote = 0  if plh0333 == 28 // didn't vote/not eligible
replace vote = 1  if plh0333 == 1  // SPD
replace vote = 2  if inlist(plh0333, 2, 3, 13) // CDU/CSU
replace vote = 3  if plh0333 == 4  // FPD
replace vote = 4  if plh0333 == 5  // Grüne
replace vote = 5  if plh0333 == 6  // Linke
replace vote = 6  if plh0333 == 7  // Rechte (NPD/Rep/DieRechte)
replace vote = 8  if plh0333 == 27 // AfD
replace vote = 7  if inrange(plh0333, 8, 12) | inrange(plh0333, 13, 26) | ///
                       inrange(plh0333, 29, 31) // Others
replace vote = .b if plh0333 == -8
la def party 0 "None" 1 "SPD" 2 "CDU/CSU" 3 "FDP" 4 "Green" 5 "Left" ///
             6 "Far Right" 7 "Others" 8 "AfD"
la val partyid party
la val vote  party
					  
* Left-right self-placement (2005, 2009, 2014)
clonevar lr_self = plh0004 			   
					   
* Worries
clonevar wr_house = plh0029 // keep housing
clonevar wr_ecego = plh0033 // own economic
clonevar wr_ecnat = plh0032 // national economic
clonevar wr_crime = plh0040 // development of crime in D
clonevar wr_jbsec = plh0042 // job security
clonevar wr_immig = plj0046 // immigration

* Satisfaction
clonevar stf_life = plh0182    // life in general
clonevar stf_work = plh0173    // work
clonevar stf_hinc = plh0175    // HH income
clonevar stf_pinc = plh0176    // personal income
clonevar stf_demo = plh0152_v2 // democracy

* Willingness to take risk in job
clonevar wttr_job = plh0200
		
* Political interest
clonevar pol_int = plh0007

* (Welfare) State attitudes (1997, 2002, 2017)
clonevar wsa_fm1 = plh0016 
clonevar wsa_fm2 = plh0017 
clonevar wsa_fm3 = plh0018 
clonevar wsa_se1 = plh0019
clonevar wsa_se2 = plh0020
clonevar wsa_sk1 = plh0021
clonevar wsa_sk2 = plh0022
clonevar wsa_ol1 = plh0023
clonevar wsa_ol2 = plh0024
clonevar wsa_in1 = plh0025
clonevar wsa_in2 = plh0026


*******************************************************************************
** Housing-related variables
*******************************************************************************

*** Raw variables
clonevar stf_larng = plj0681 // living arrangements
clonevar stf_larea = plh0156 // area 
clonevar stf_dwell = plh0177 // dwelling/apartment
clonevar tie_larea = plj0043 // ties to local area
clonevar want_move = plj0327 // Last 2 years: looking for residence (2015, 2017)
clonevar pref_bcty = plm0580
clonevar pref_mcty = plm0581
clonevar pref_town = plm0582
clonevar pref_cntr = plm0583

*** Generated variables
* Owner/renter
gen owner = (hgowner == 1) + 1
replace owner = 0 if hgnorent == 1
la def owner 0 "renter (no rent)" 1 "renter" 2 "owner"
la val owner owner

* "Kaltmiete"
clonevar cold_rent = hgrent if hgrent >= 0  
gen      cold_rent_sqm = cold_rent / hgsize if hgsize >= 0

* "Warmmiete"
clonevar warm_rent = cold_rent
replace  warm_rent = warm_rent + hgutil if hgutil >= 0
gen      warm_rent_sqm = warm_rent / hgsize if hgsize >= 0

* Rent load
gen     cold_rent_load = cold_rent / hlc0005_h if hlc0005_h >= 0
replace cold_rent_load = 1 if cold_rent_load > 1 & !missing(cold_rent_load)
gen     warm_rent_load = warm_rent / hlc0005_h if hlc0005_h >= 0
replace warm_rent_load = 1 if warm_rent_load > 1 & !missing(warm_rent_load)

* Household imputed rental value 
/* This variable represents the imputed rental value of owner occupied housing 
and for renters with below markets rent.*/ 
clonevar imputed_rent = i11105
mvdecode imputed_rent, mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)
replace  imputed_rent = imputed_rent / 12
gen      imputed_rent_per_sqm = imputed_rent / hgsize if hgsize >= 0

* Home size
gen home_size = hgsize

* Social housing
recode hgrsubs (2 3 = 0) (-5 -1 = .), gen(social_housing)
replace social_housing = -2 if owner == 2


*******************************************************************************
** Moving 
*******************************************************************************

#delim ;
* Years since move-in;
gen yrs_since_movein = syear - hgmoveyr if hgmoveyr >= 0 & !missing(hgmoveyr);

* Since when is household registered at current address?;
gen hh_at_current_address = wein_v3;

* Update based on "resmove" where missing;
gen movein_resmove = .;
replace movein_resmove = syear if resmove == 1;
sort id syear;
bys id: replace movein_resmove = movein_resmove[_n-1] 
	if missing(movein_resmove);
replace yrs_since_movein = movein_resmove if missing(yrs_since_movein);

* Year moved into current apartment;
gen year_of_movein = hgmoveyr if hgmoveyr >= 0 & !missing(hgmoveyr);

* Movein before first interview;
gen movein_before_entry = (year_of_movein < erstbefr) 
	if !missing(year_of_movein);

* Residential move;
gen mover_tmp = 0 if !missing(year_of_movein);
replace mover_tmp = 1 if inrange(yrs_since_movein, 0, 2) & 
	(!inlist(hlf0107_h, -8, -5, -2) | inrange(hlf0108_h, -1, 9));
clonevar mover = resmove if resmove >= 0;
replace mover = mover_tmp if missing(mover);
drop mover_tmp;

* Difference between resmove year and reported year;
gen diff = movein_resmove - year_of_movein if syear >= 2001;

* Mover typology;
#delim ;
gen mover_type = mover;
replace mover_type = 1 if mover == 1 & movein_before_entry == 1;
replace mover_type = 2 if mover == 1 & movein_before_entry == 0 & 
	!inrange(yrs_since_movein, 0, 2);
replace mover_type = 3 if mover == 1 & movein_before_entry == 0 & 
	inrange(yrs_since_movein, 0, 2) & diff == 0;
replace mover_type = 4 if mover == 1 & movein_before_entry == 0 & 
	inrange(yrs_since_movein, 0, 2) & diff == 1;
replace mover_type = 5 if mover == 1 & movein_before_entry == 0 & 
	inrange(yrs_since_movein, 0, 2) & diff == 2;
replace mover_type = 6 if mover == 1 & movein_before_entry == 0 & 
	inrange(yrs_since_movein, 0, 2) & !inrange(diff, 0, 2);
replace mover_type = 7 if mover == 1 & movein_before_entry == 0 & 
	inrange(yrs_since_movein, 0, 2) & missing(diff);

la def mover_type 0 "Did not move recently" 
	1 "Moved recently, but not after joining panel" 
	2 "resmove recorded, but no recent reported move" 
	3 "Moved recently, matching years" 
	4 "Moved recently, resmove +1" 
	5 "Moved recently, resmove +2" 
	6 "Moved recently, resmove mismatch" 
	7 "Moved recently, resmove missing", replace;
la val mover_type mover_type;

* Lag in reporting move;
gen lag_in_reporting_move = syear - hlf0107_h if inrange(hlf0107_h, 1900, 2019);

* Mover Typology 2;
gen mover_type2 = mover;
replace mover_type2 = 1 if mover == 1 & chg_zip == -2;
replace mover_type2 = 2 if mover == 1 & chg_zip == 0;
replace mover_type2 = 3 if mover == 1 & chg_zip == 1;
la def mover_type2 0 "Did not move recently" 
	1 "Moved recently, PLZ NA" 
	2 "Moved recently, PLZ same"
	3 "Moved recently, PLZ changed", replace;
la val mover_type2 mover_type2;
# delim cr

* Reasons for moving
clonevar reason_for_moving = hlf0108_h

* Reasons for moving: costs
gen reason_for_moving_costs = 0
replace reason_for_moving_costs = . if !inrange(hlf0108_h, 1, 9) & ///
        hlf0108_v11 != 1
replace reason_for_moving_costs = 1 if hlf0108_v11 == 1

* Reasons for moving: comfort (amenities, neighborhood, environment)
gen reason_for_moving_comfort = 0
replace reason_for_moving_comfort = . if !inrange(hlf0108_h, 1, 9) & ///
        (hlf0108_v12 != 1 & hlf0108_v13 != 1 &  hlf0108_v14 != 1)
replace reason_for_moving_comfort = 1 if (hlf0108_v12 == 1 | hlf0108_v13 == 1 | hlf0108_v14 == 1)


* Other considerations
clonevar contact_neighbors = hlf0152
clonevar moved_area_type = hlf0153_h

* Old vs new apartment
clonevar moved_finances = hlf0126
clonevar moved_size = hlf0127
clonevar moved_furnishings = hlf0128
clonevar moved_area = hlf0129
clonevar moved_environment = hlf0130 
clonevar moved_connections = hlf0131
clonevar moved_neighborhood = hlf0132
clonevar moved_contract = hlf0526

foreach var of varlist moved_finances - moved_contract {
  mvdecode `var', mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)
  replace `var' = `var' * (-1) + 2
  cap _strip_labels `var'
}
egen moved_overall = rowmean(moved_finances - moved_contract)

*******************************************************************************
** Assets
*******************************************************************************

* Define
gen asset_re_hom_p1 = p010ha
gen asset_re_hom_p2 = p010hb
gen asset_re_hom_p3 = p010hc
gen asset_re_hom_p4 = p010hd
gen asset_re_hom_p5 = p010he
gen asset_re_hom_m1 = p001ha
gen asset_re_hom_m2 = p001hb
gen asset_re_hom_m3 = p001hc
gen asset_re_hom_m4 = p001hd
gen asset_re_hom_m5 = p001he
gen asset_re_hom_t1 = p011ha
gen asset_re_hom_t2 = p011hb
gen asset_re_hom_t3 = p011hc
gen asset_re_hom_t4 = p011hd
gen asset_re_hom_t5 = p011he
gen asset_re_oth_p1 = e010ha
gen asset_re_oth_p2 = e010hb
gen asset_re_oth_p3 = e010hc
gen asset_re_oth_p4 = e010hd
gen asset_re_oth_p5 = e010he
gen asset_re_oth_m1 = e001ha
gen asset_re_oth_m2 = e001hb
gen asset_re_oth_m3 = e001hc
gen asset_re_oth_m4 = e001hd
gen asset_re_oth_m5 = e001he
gen asset_re_oth_t1 = asset_re_oth_p1 - asset_re_oth_m1
gen asset_re_oth_t2 = asset_re_oth_p2 - asset_re_oth_m2
gen asset_re_oth_t3 = asset_re_oth_p3 - asset_re_oth_m3
gen asset_re_oth_t4 = asset_re_oth_p4 - asset_re_oth_m4
gen asset_re_oth_t5 = asset_re_oth_p5 - asset_re_oth_m5
gen asset_re_ttl_p1 = asset_re_hom_p1 + asset_re_oth_p1
gen asset_re_ttl_p2 = asset_re_hom_p2 + asset_re_oth_p2
gen asset_re_ttl_p3 = asset_re_hom_p3 + asset_re_oth_p3
gen asset_re_ttl_p4 = asset_re_hom_p4 + asset_re_oth_p4
gen asset_re_ttl_p5 = asset_re_hom_p5 + asset_re_oth_p5
gen asset_re_ttl_m1 = asset_re_hom_m1 + asset_re_oth_m1
gen asset_re_ttl_m2 = asset_re_hom_m2 + asset_re_oth_m2
gen asset_re_ttl_m3 = asset_re_hom_m3 + asset_re_oth_m3
gen asset_re_ttl_m4 = asset_re_hom_m4 + asset_re_oth_m4
gen asset_re_ttl_m5 = asset_re_hom_m5 + asset_re_oth_m5
gen asset_re_ttl_t1 = asset_re_hom_t1 + asset_re_oth_t1
gen asset_re_ttl_t2 = asset_re_hom_t2 + asset_re_oth_t2
gen asset_re_ttl_t3 = asset_re_hom_t3 + asset_re_oth_t3
gen asset_re_ttl_t4 = asset_re_hom_t4 + asset_re_oth_t4
gen asset_re_ttl_t5 = asset_re_hom_t5 + asset_re_oth_t5
gen asset_ov_ttl_t1 = w011ha
gen asset_ov_ttl_t2 = w011hb
gen asset_ov_ttl_t3 = w011hc
gen asset_ov_ttl_t4 = w011hd
gen asset_ov_ttl_t5 = w011he

* Qualitative indicator
la def asset_re_type 1 "does not own real estate" ///
                     2 "mortgaged (> 50% of asset value)" ///
                     3 "mortgaged (< 50% of asset value)" ///
					 4 "not mortgaged", replace
forvalues k = 1/5 {
  tempvar mortgage_ratio
  gen     `mortgage_ratio' = asset_re_ttl_m`k' / asset_re_ttl_p`k'
  gen     asset_re_type`k' = .
  replace asset_re_type`k' = 1 if asset_re_ttl_p`k' == 0 
  replace asset_re_type`k' = 2 if asset_re_ttl_p`k' > 0 & `mortgage_ratio' > .5
  replace asset_re_type`k' = 3 if asset_re_ttl_p`k' > 0 & `mortgage_ratio' <= .5
  replace asset_re_type`k' = 4 if asset_re_ttl_p`k' > 0 & asset_re_ttl_m`k' == 0
  la val  asset_re_type`k' asset_re_type
  replace asset_re_type`k' = . if !inlist(year, 2002, 2007, 2012, 2017)
}

* Average across imputations
egen asset_re_ttl_m = rowtotal(asset_re_ttl_m*)
egen asset_re_ttl_p = rowtotal(asset_re_ttl_p*)
egen asset_re_ttl_t = rowtotal(asset_re_ttl_t*)
egen asset_ov_ttl_t = rowtotal(asset_ov_ttl_t*)

tempvar mortgage_ratio
gen     `mortgage_ratio' = asset_re_ttl_m / asset_re_ttl_p
gen     asset_re_type = .
replace asset_re_type = 1 if asset_re_ttl_p == 0 
replace asset_re_type = 2 if asset_re_ttl_p > 0 & `mortgage_ratio' > .5
replace asset_re_type = 3 if asset_re_ttl_p > 0 & `mortgage_ratio' <= .5
replace asset_re_type = 4 if asset_re_ttl_p > 0 & asset_re_ttl_m == 0
la val  asset_re_type asset_re_type
replace asset_re_type = . if !inlist(year, 2002, 2007, 2012, 2017)
  
  
*******************************************************************************
** Additional variables
*******************************************************************************

* Union membership
gen union = plh0263_v2
recode union (2 = 0)

* Fractional unemployment (proportion of time unemployed since entering lf)
mvdecode pgexpue, mv(-1) // time unemployed since lf entry in years
mvdecode einstieg_artk einstieg_pbio, mv(-2 -1)
replace einstieg_artk = einstieg_pbio if missing(einstieg_artk)
gen yrs_active = year - einstieg_artk // years since first real job
replace yrs_active = 0 if yrs_active < 0 // labor market entry after survey year
gen unemp_frac = pgexpue / (yrs_active * 100) // fractional unemployment
replace unemp_frac = 0 if yrs_active == 0 & lm_part == 1 // employed lf entrants
replace unemp_frac = 1 if yrs_active == 0 & lm_part == 2 // unemployed lf entrants


*******************************************************************************
** Recode Missing Values
*******************************************************************************

*** Missings (standard SOEP variables)
*       -- code only item non-response to missing . (code - 1)
*       -- retain codes -5 (not in questionnaire version) and -8 (not in wave)
*          as . a and .b
mvdecode id-moved_area_type union-unemp_frac, mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)
mvdecode erstbefr, mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)

*** Missings (geo-referenced SOEP variables)
** From REGIONL
* Recode
global regionl_vars = "regbez-kr_gdp_pc"
mvdecode $regionl_vars, mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)

* Exclude variables missing from 2005 onward
bys syear: mdesc $regionl_vars if year >= 2005
drop gbou ror8595 sampoint kr_population

** From MOVEDIST
* Recode
global movedist_vars = "resmove-chg_zip"
mvdecode $movedist_vars, mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)


*******************************************************************************
** Weights
*******************************************************************************

* Step 1: Sample restrictions
keep if citizen == 1 // citizens
keep if age >= 18 // eligible to vote at federal level
drop if inrange(psample, 16, 20) // - Flucht, Flucht/Familie, Migration 2
drop if missing(plz) // keep only observation with zip code information
drop if syear < 2002 // discard pre-2002 observations
sort id syear

* Step 2: Transform HRF to unit-mean, sum-to-N weights. Equal weight for each
*         wave
gen weight = xweight1
qui: unique wave
gen count = _N / r(unique) // equal weight to each wave
egen sum = sum(weight), by(year)
replace weight = weight * count / sum
drop count sum


*******************************************************************************
** Indicators for non-inclusion of attitidinal variables in waves
*******************************************************************************

foreach X of varlist partyid-moved_overall union unemp_frac {
	gen niw_`X' = (`X' == .b)
}

*******************************************************************************
** Select variables
*******************************************************************************

keep psample pgexpue id-niw_union erstbefr $regionl_vars $movedist_vars


*******************************************************************************
** Add missing information in mover variables (using lags of KKZ and PLZ)
*******************************************************************************

sort id year
gen plz_lag = .
bys id: replace plz_lag = plz[_n-1]
sort id year
gen kkz_lag = .
bys id: replace kkz_lag = kkz[_n-1]
sort id year
gen kr_kkz_rek_lag = .
bys id: replace kr_kkz_rek_lag = kr_kkz_rek[_n-1]

foreach var of varlist resmove chg_kkz chg_zip {
  clonevar `var'_full = `var'
}
replace resmove_full = mover if missing(resmove_full)
replace chg_kkz_full = 0 if missing(chg_kkz_full) & ///
                            kr_kkz_rek == kr_kkz_rek_lag & ///
			    !missing(kr_kkz_rek_lag)
replace chg_kkz_full = 1 if missing(chg_kkz_full) & ///
                            kr_kkz_rek != kr_kkz_rek_lag & ///
			    !missing(kr_kkz_rek_lag)
replace chg_zip_full = 0 if missing(chg_zip_full) & ///
                            plz == plz_lag & ///
			    !missing(plz_lag)
replace chg_zip_full = 1 if missing(chg_zip_full) & ///
                            plz != plz_lag & ///
			    !missing(plz_lag)


*******************************************************************************
** Save
*******************************************************************************

saveold $savepath/soepv35_rents.dta, replace v(12)

log close
